Table of Contents
$$ \hat{y} = \theta_0 + \theta_{1}x_1 + \theta_{2}x_2 + \theta_{3}x_3 + \cdots $$
$+1 \to$ close to a straight line
$-1 \to$ close to a straight line
Indicate how close to a linear line, but
No information on slope
from google.colab import drive
drive.mount('/content/drive')
import warnings
warnings.filterwarnings(action = 'ignore')
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# data points in column vector [input, output]
x = np.array([0.1, 0.4, 0.7, 1.2, 1.3, 1.7, 2.2, 2.8, 3.0, 4.0, 4.3, 4.4, 4.9]).reshape(-1, 1)
y = np.array([0.5, 0.9, 1.1, 1.5, 1.5, 2.0, 2.2, 2.8, 2.7, 3.0, 3.5, 3.7, 3.9]).reshape(-1, 1)
# to plot
plt.figure(figsize = (10, 6))
plt.title('Linear Regression', fontsize = 15)
plt.xlabel('X', fontsize = 15)
plt.ylabel('Y', fontsize = 15)
plt.plot(x, y, 'ko', label = "data")
plt.xlim([0, 5])
plt.grid(alpha = 0.3)
plt.axis('scaled')
plt.show()
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x,y)
print(reg.coef_) # Coef
print(reg.intercept_) # Bias
# to plot
plt.figure(figsize = (10, 6))
plt.title('Linear Regression', fontsize = 15)
plt.xlabel('X', fontsize = 15)
plt.ylabel('Y', fontsize = 15)
plt.plot(x, y, 'ko', label = "data")
# to plot a straight line (fitted line)
xp = np.arange(0, 5, 0.01).reshape(-1, 1)
yp = reg.coef_*xp + reg.intercept_
plt.plot(xp, yp, 'r', linewidth = 2, label = "$L_2$")
plt.legend(fontsize = 15)
plt.axis('scaled')
plt.grid(alpha = 0.3)
plt.xlim([0, 5])
plt.show()
n = 100
x = -5 + 15*np.random.rand(n, 1)
noise = 10*np.random.randn(n, 1)
y = 10 + 1*x + 2*x**2 + noise
plt.figure(figsize = (10, 6))
plt.title('Nonlinear Regression', fontsize = 15)
plt.xlabel('X', fontsize = 15)
plt.ylabel('Y', fontsize = 15)
plt.plot(x, y, 'o', markersize = 4, label = 'actual')
plt.xlim([np.min(x), np.max(x)])
plt.grid(alpha = 0.3)
plt.legend(fontsize = 15)
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree = 2, include_bias = False)
x_poly = poly_features.fit_transform(x)
reg.fit(x_poly, y)
p = reg.predict(x_poly)
plt.figure(figsize = (10, 6))
plt.title('Nonlinear Regression', fontsize = 15)
plt.xlabel('X', fontsize = 15)
plt.ylabel('Y', fontsize = 15)
plt.plot(x, y, 'o', markersize = 4, label = 'actual')
plt.plot(x, p, 'ro', markersize = 4, label = 'predict')
plt.grid(alpha = 0.3)
plt.legend(fontsize = 15)
plt.xlim([np.min(x), np.max(x)])
plt.show()
x1 = 8*np.random.rand(100, 1)
x2 = 7*np.random.rand(100, 1) - 4
g0 = 0.8*x1 + x2 - 3
g1 = g0 - 1
g2 = g0 + 1
C1 = np.where(g1 >= 0)[0]
C2 = np.where(g2 < 0)[0]
X1 = np.hstack([x1[C1],x2[C1]])
X2 = np.hstack([x1[C2],x2[C2]])
n = X1.shape[0]
m = X2.shape[0]
X = np.vstack([X1, X2])
y = np.vstack([np.zeros([n, 1]), np.ones([m, 1])])
plt.figure(figsize = (10, 6))
plt.plot(x1[C1], x2[C1], 'ro', label = 'C1')
plt.plot(x1[C2], x2[C2], 'bo', label = 'C2')
plt.xlabel('$x_1$', fontsize = 20)
plt.ylabel('$x_2$', fontsize = 20)
plt.legend(loc = 4)
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
from sklearn.svm import SVC
clf = SVC(kernel = 'linear')
clf.fit(X, y)
print(clf.coef_)
print(clf.intercept_)
xp = np.linspace(0,8,100).reshape(-1,1)
yp = -clf.coef_[0,0]/clf.coef_[0,1]*xp - clf.intercept_/clf.coef_[0,1]
plt.figure(figsize = (10, 6))
plt.plot(X[0:n, 0], X[0:n, 1], 'ro', label = 'C1')
plt.plot(X[n:-1, 0], X[n:-1, 1], 'bo', label = 'C2')
plt.plot(xp, yp, '--k', label = 'SVM')
plt.xlabel('$x_1$', fontsize = 20)
plt.ylabel('$x_2$', fontsize = 20)
plt.legend(loc = 4)
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
m = 500
X0 = np.random.multivariate_normal([0, 0], np.eye(2), m)
X1 = np.random.multivariate_normal([10, 10], np.eye(2), m)
X = np.vstack([X0, X1])
y = np.vstack([np.zeros([m,1]), np.ones([m,1])])
plt.figure(figsize = (10, 6))
plt.plot(X0[:,0], X0[:,1], '.b', label = 'Class 0')
plt.plot(X1[:,0], X1[:,1], '.k', label = 'Class 1')
plt.title('Data Classes', fontsize = 15)
plt.legend(loc = 'lower right', fontsize = 15)
plt.xlabel('X1', fontsize = 15)
plt.ylabel('X2', fontsize = 15)
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.grid(alpha = 0.3)
plt.show()
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X, y)
print(clf.coef_)
print(clf.intercept_)
xp = np.linspace(-10,20,100).reshape(-1,1)
yp = -clf.coef_[0,0]/clf.coef_[0,1]*xp - clf.intercept_/clf.coef_[0,1]
plt.figure(figsize = (10, 6))
plt.plot(X0[:,0], X0[:,1], '.b', label = 'Class 0')
plt.plot(X1[:,0], X1[:,1], '.k', label = 'Class 1')
plt.plot(xp, yp, '--k', label = 'Logistic')
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.title('Data Classes', fontsize = 15)
plt.legend(loc = 'lower right', fontsize = 15)
plt.xlabel('X1', fontsize = 15)
plt.ylabel('X2', fontsize = 15)
plt.grid(alpha = 0.3)
plt.show()
pred = clf.predict_proba([[0,6]])
pred
Data clustering is an unsupervised learning problem
Given:
m = 200
X0 = np.random.multivariate_normal([-1, 1], np.eye(2), m)
X1 = np.random.multivariate_normal([15, 10], np.eye(2), m)
X2 = np.random.multivariate_normal([0, 6], np.eye(2), m)
X = np.vstack([X0, X1, X2])
plt.figure(figsize = (10, 6))
plt.plot(X[:,0], X[:,1], '.b')
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.grid(alpha = 0.3)
plt.show()
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 3, random_state = 0)
kmeans.fit(X)
print(kmeans.labels_)
plt.figure(figsize = (10,6))
plt.plot(X[kmeans.labels_ == 0,0],X[kmeans.labels_ == 0,1],'g.', label = 0)
plt.plot(X[kmeans.labels_ == 1,0],X[kmeans.labels_ == 1,1],'k.', label = 1)
plt.plot(X[kmeans.labels_ == 2,0],X[kmeans.labels_ == 2,1],'r.', label = 2)
plt.xlim([-10, 20])
plt.ylim([-4, 14])
plt.grid(alpha = 0.3)
plt.legend(loc = 'lower right', fontsize = 15)
plt.show()
Each example $x$ has 2 features $\{x_1,x_2\}$
Consider ignoring the feature $x_2$ for each example
Each 2-dimensional example $x$ now becomes 1-dimensional $x = \{x_1\}$
Are we losing much information by throwing away $x_2$ ?
No. Most of the data spread is along 𝑥_1 (very little variance along 𝑥_2)
Each example $x$ has 2 features $\{x_1,x_2\}$
Consider ignoring the feature $x_2$ for each example
Each 2-dimensional example $x$ now becomes 1-dimensional $x = \{x_1\}$
Are we losing much information by throwing away $x_2$ ?
Yes, the data has substantial variance along both features (i.e. both axes)
Now consider a change of axes
Each example $x$ has 2 features $\{u_1,u_2\}$
Consider ignoring the feature $u_2$ for each example
Each 2-dimensional example $x$ now becomes 1-dimensional $x = \{u_1\}$
Are we losing much information by throwing away $u_2$ ?
No. Most of the data spread is along $u_1$ (very little variance along $u_2$)
m = 5000
mu = np.array([0, 0])
sigma = np.array([[3, 1.5],
[1.5, 1]])
X = np.random.multivariate_normal(mu, sigma, m)
fig = plt.figure(figsize = (10, 6))
plt.plot(X[:,0], X[:,1], 'k.')
plt.axis('equal')
plt.show()
from sklearn.decomposition import PCA
pca = PCA(n_components = 2)
pca.fit(X)
plt.figure()
plt.stem(range(1,3),pca.explained_variance_ratio_)
plt.xlim([0.5, 2.5])
plt.ylim([0, 1])
plt.title('Score (%)')
plt.show()
principal_axis = pca.components_[0, :]
h = principal_axis[1]/principal_axis[0]
xp = np.linspace(-6,6,200)
yp = xp.dot(h)
plt.figure(figsize=(10,6))
plt.plot(X[:, 0], X[:, 1],'k.')
plt.plot(xp, yp, 'r.')
plt.axis('equal')
plt.show()
from sklearn import tree
data = np.array([[0, 0, 1, 0, 0],
[1, 0, 2, 0, 0],
[0, 1, 2, 0, 1],
[2, 1, 0, 2, 1],
[0, 1, 0, 1, 1],
[1, 1, 1, 2, 0],
[1, 1, 0, 2, 0],
[0, 0, 2, 1, 0]])
x = data[:,0:4]
y = data[:,4]
print(x, '\n')
print(y)
clf = tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, random_state=0)
clf.fit(x,y)
# [?, Yes, Low, Medium]
clf.predict([[0, 0, 1, 0]])
X1 = np.array([[-1.1,0],[-0.3,0.1],[-0.9,1],[0.8,0.4],[0.4,0.9],[0.3,-0.6],
[-0.5,0.3],[-0.8,0.6],[-0.5,-0.5]])
X0 = np.array([[-1,-1.3], [-1.6,2.2],[0.9,-0.7],[1.6,0.5],[1.8,-1.1],[1.6,1.6],
[-1.6,-1.7],[-1.4,1.8],[1.6,-0.9],[0,-1.6],[0.3,1.7],[-1.6,0],[-2.1,0.2]])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
plt.figure(figsize=(10, 8))
plt.plot(X1[:,0], X1[:,1], 'ro', label = 'C1')
plt.plot(X0[:,0], X0[:,1], 'bo', label = 'C0')
plt.title('SVM for Nonlinear Data', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.show()
N = X1.shape[0]
M = X0.shape[0]
X = np.vstack([X1, X0])
y = np.vstack([np.ones([N,1]), np.zeros([M,1])])
clf = tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 4, random_state=0)
clf.fit(X,y)
clf.predict([[0, 1]])
# to plot
[X1gr, X2gr] = np.meshgrid(np.arange(-3,3,0.1), np.arange(-3,3,0.1))
Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)
q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)
C1 = np.where(q == 1)[0]
plt.figure(figsize = (10, 8))
plt.plot(X1[:,0], X1[:,1], 'ro', label = 'C1')
plt.plot(X0[:,0], X0[:,1], 'bo', label = 'C0')
plt.plot(Xp[C1,0], Xp[C1,1], 'gs', markersize = 8, alpha = 0.1, label = 'Decison Tree')
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.show()
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
## generate three simulated clusters
mu1 = np.array([1, 7])
mu2 = np.array([3, 4])
mu3 = np.array([6, 5])
SIGMA1 = 0.8*np.array([[1, 1.5],
[1.5, 3]])
SIGMA2 = 0.5*np.array([[2, 0],
[0, 2]])
SIGMA3 = 0.5*np.array([[1, -1],
[-1, 2]])
X1 = np.random.multivariate_normal(mu1, SIGMA1, 100)
X2 = np.random.multivariate_normal(mu2, SIGMA2, 100)
X3 = np.random.multivariate_normal(mu3, SIGMA3, 100)
y1 = 1*np.ones([100,1])
y2 = 2*np.ones([100,1])
y3 = 3*np.ones([100,1])
plt.figure(figsize = (10, 8))
plt.title('Generated Data', fontsize = 15)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()
X = np.vstack([X1, X2, X3])
y = np.vstack([y1, y2, y3])
clf = tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, random_state = 42)
clf.fit(X,y)
res = 0.3
[X1gr, X2gr] = np.meshgrid(np.arange(-2,10,res), np.arange(0,12,res))
Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)
q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)
C1 = np.where(q == 1)[0]
C2 = np.where(q == 2)[0]
C3 = np.where(q == 3)[0]
plt.figure(figsize = (10, 8))
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp[C1,0], Xp[C1,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp[C2,0], Xp[C2,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp[C3,0], Xp[C3,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()
from sklearn import ensemble
clf = ensemble.RandomForestClassifier(n_estimators = 100, max_depth = 3, random_state = 0)
clf.fit(X, y)
res = 0.3
[X1gr, X2gr] = np.meshgrid(np.arange(-2,10,res), np.arange(0,12,res))
Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)
q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)
C1 = np.where(q == 1)[0]
C2 = np.where(q == 2)[0]
C3 = np.where(q == 3)[0]
plt.figure(figsize = (10, 8))
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp[C1,0], Xp[C1,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp[C2,0], Xp[C2,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp[C3,0], Xp[C3,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()
train_x, test_x = train_x/255.0, test_x/255.0
img = train_x[5].reshape(28,28)
plt.figure(figsize = (6,6))
plt.imshow(img, 'gray')
plt.xticks([])
plt.yticks([])
plt.show()
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape = (28, 28)),
tf.keras.layers.Dense(units = 100, activation = 'relu'),
tf.keras.layers.Dense(units = 10, activation = 'softmax')
])
model.compile(optimizer = 'adam',
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy'])
# Train Model
loss = model.fit(train_x, train_y, epochs = 5)
# Evaluate Test Data
test_loss, test_acc = model.evaluate(test_x, test_y)
test_img = test_x[np.random.choice(test_x.shape[0], 1)]
predict = model.predict_on_batch(test_img)
mypred = np.argmax(predict, axis = 1)
plt.figure(figsize = (12,5))
plt.subplot(1,2,1)
plt.imshow(test_img.reshape(28, 28), 'gray')
plt.axis('off')
plt.subplot(1,2,2)
plt.stem(predict[0])
plt.show()
print('Prediction : {}'.format(mypred[0]))
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')